#include "Out_layer.h"

float expf(float x){
//#pragma HLS INLINE
    x = 1.0+x/1024;
    x *= x; x *= x; x *= x; x *= x; x *= x;
    x *= x; x *= x; x *= x; x *= x; x *= x;
    return x;
}


int softmax(float input_feature[Category])
{
//#pragma HLS INLINE
    float tem[Category];
    float sum = 0;
    int max_arg = 0;
    float max = 0;

    for(int i=0; i<Category; i++){
        tem[i] = expf(input_feature[i]);
        sum += tem[i];
    }
    for(int i=0; i<Category; i++ ){
        tem[i] = tem[i]/sum;
        if(tem[i]>max){
            max = tem[i];
            max_arg = i;
        }
    }
    return max_arg;
}


void GAP(float input_feature[image_Batch*CONV_2_TYPE*CONV_2_OUTPUT_SIZE],
        float output_feature[image_Batch*CONV_2_TYPE]
        )
{
        copy_input:
        for(int batch=0; batch<image_Batch; batch++){
                int offset = batch*CONV_2_TYPE;
                int batch_offset = offset*CONV_2_OUTPUT_SIZE;
                for(int i=0; i<CONV_2_TYPE; i++){
//#pragma HLS pipeline II=1
                        int posi = batch_offset+i*CONV_2_OUTPUT_SIZE;
                        float sum=0;
                        for(int j=0; j<CONV_2_OUTPUT_SIZE; j++){
                                sum += input_feature[posi+j];
                        }
                        output_feature[offset+i] = sum/CONV_2_OUTPUT_SIZE;
                }
        }
}


void OUTPUT_LAYER(float input_feature[image_Batch*CONV_2_TYPE],
            float weights[CONV_2_TYPE*Category],
            float bias[Category],
            int output_feature[image_Batch])
{
    float IBRAM[image_Batch][CONV_2_TYPE];
    float WBRAM[Category][CONV_2_TYPE];
    float biasBRAM[Category];
    float OBRAM[image_Batch];

//#pragma HLS array_partition variable=WBRAM complete dim=1
//#pragma HLS array_partition variable=biasBRAM complete dim=0
//#pragma HLS array_partition variable=OBRAM complete dim=0

    copy_input:
    for(int i=0; i<image_Batch; i++){
        int offset = i*CONV_2_TYPE;
        for(int j=0; j<CONV_2_TYPE; j++){
//#pragma HLS pipeline II=1
            IBRAM[i][j] = input_feature[offset+j];
        }
    }

    copy_weight:
    for(int i=0; i<Category; i++){
        for(int j=0; j<CONV_2_TYPE; j++){
//#pragma HLS pipeline II=1
            WBRAM[i][j] = weights[j*Category+i];
        }
    }

    copy_bias:
    for(int i=0; i<Category; i++){
//#pragma HLS pipeline II=1
        biasBRAM[i] = bias[i];
    }

    for(int batch=0; batch<image_Batch; batch++){
        float accu[Category];
        for(int i=0; i<Category; i++){
//#pragma HLS pipeline II=1
            for(int j=0; j<CONV_2_TYPE; j++){
//#pragma HLS unroll
                if(j==0)
                    accu[i] = IBRAM[batch][j]*WBRAM[i][j]+biasBRAM[i];
                else
                    accu[i] +=  IBRAM[batch][j]*WBRAM[i][j];
            }
        }
        OBRAM[batch] = softmax(accu);
    }

    for(int i=0; i<image_Batch; i++){
//#pragma HLS pipeline II=1
        output_feature[i] = OBRAM[i];
    }

}
